{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "nrc = pd.read_excel(\"NRCEmotionLexicon.xlsx\")\n",
    "content = pd.read_csv(\"alltopcontent.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 205147 entries, 0 to 205146\n",
      "Data columns (total 3 columns):\n",
      "title           205146 non-null object\n",
      "total_shares    205147 non-null int64\n",
      "url             205147 non-null object\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 4.7+ MB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "nrc = nrc[[\"English (en)\", \"Anger\", \"Anticipation\", \"Disgust\", \"Fear\", \"Joy\", \"Sadness\", \"Surprise\", \"Trust\"]]\n",
    "content = content[[\"title\", \"total_shares\", \"url\"]]\n",
    "print(content.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#205147, 0, 205146\n",
    "\n",
    "import nltk\n",
    "from nltk.tokenize import RegexpTokenizer\n",
    "tokenizer = RegexpTokenizer(r'\\w+')\n",
    "\n",
    "\n",
    "def makeemotioncsvforrange(x,y):\n",
    "    originalframe = content.iloc[x:y]\n",
    "    angercolumn = []\n",
    "    anticipationcolumn = []\n",
    "    disgustcolumn = []\n",
    "    fearcolumn = []\n",
    "    joycolumn = []\n",
    "    sadnesscolumn =[]\n",
    "    surprisecolumn = []\n",
    "    trustcolumn = []\n",
    "    for i in range(x,y):\n",
    "        print(i)\n",
    "        string = str(content[\"title\"][i])\n",
    "        string = string.lower()\n",
    "        titlewords = tokenizer.tokenize(string)\n",
    "        lineanger = 0\n",
    "        lineanticipation = 0\n",
    "        linedisgust = 0\n",
    "        linefear = 0\n",
    "        linejoy = 0\n",
    "        linesadness = 0\n",
    "        linesurprise = 0\n",
    "        linetrust = 0\n",
    "        for word in titlewords:\n",
    "            for q in range(0,14182):\n",
    "                emoword = nrc['English (en)'][q]\n",
    "                emoword = str(emoword)\n",
    "                if word == emoword:\n",
    "                    lineanger += (nrc[\"Anger\"][q])\n",
    "                    lineanticipation += (nrc[\"Anticipation\"][q])\n",
    "                    linedisgust += (nrc[\"Disgust\"][q])\n",
    "                    linefear += (nrc[\"Fear\"][q])\n",
    "                    linejoy += (nrc[\"Joy\"][q])\n",
    "                    linesadness += (nrc[\"Sadness\"][q])\n",
    "                    linesurprise += (nrc[\"Surprise\"][q])\n",
    "                    linetrust += (nrc[\"Trust\"][q])\n",
    "        angercolumn.append(lineanger)\n",
    "        anticipationcolumn.append(lineanticipation)\n",
    "        disgustcolumn.append(linedisgust)\n",
    "        fearcolumn.append(linefear)\n",
    "        joycolumn.append(linejoy)\n",
    "        sadnesscolumn.append(linesadness)\n",
    "        surprisecolumn.append(linesurprise)\n",
    "        trustcolumn.append(linetrust)\n",
    "    originalframe['Anger'] = angercolumn\n",
    "    originalframe['Anticipation'] = anticipationcolumn\n",
    "    originalframe['Disgust'] = disgustcolumn\n",
    "    originalframe['Fear'] = fearcolumn\n",
    "    originalframe['Joy'] = joycolumn\n",
    "    originalframe['Sadness'] = sadnesscolumn\n",
    "    originalframe['Surpise'] = surprisecolumn\n",
    "    originalframe['Trust'] = trustcolumn\n",
    "    print(originalframe.info())\n",
    "    docname = \"/Users/tessmcnulty/emotioncsvs/\" + \"contentemotionrange\" + str(x) + \"to\" + str(y) + \".csv\"\n",
    "    originalframe.to_csv(docname)\n",
    "    \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "84500\n",
      "84501\n",
      "84502\n",
      "84503\n",
      "84504\n",
      "84505\n",
      "84506\n",
      "84507\n",
      "84508\n",
      "84509\n",
      "84510\n",
      "84511\n",
      "84512\n",
      "84513\n",
      "84514\n",
      "84515\n",
      "84516\n",
      "84517\n",
      "84518\n",
      "84519\n",
      "84520\n",
      "84521\n",
      "84522\n",
      "84523\n",
      "84524\n",
      "84525\n",
      "84526\n",
      "84527\n",
      "84528\n",
      "84529\n",
      "84530\n",
      "84531\n",
      "84532\n",
      "84533\n",
      "84534\n",
      "84535\n",
      "84536\n",
      "84537\n",
      "84538\n",
      "84539\n",
      "84540\n",
      "84541\n",
      "84542\n",
      "84543\n",
      "84544\n",
      "84545\n",
      "84546\n",
      "84547\n",
      "84548\n",
      "84549\n",
      "84550\n",
      "84551\n",
      "84552\n",
      "84553\n",
      "84554\n",
      "84555\n",
      "84556\n",
      "84557\n",
      "84558\n",
      "84559\n",
      "84560\n",
      "84561\n",
      "84562\n",
      "84563\n",
      "84564\n",
      "84565\n",
      "84566\n",
      "84567\n",
      "84568\n",
      "84569\n",
      "84570\n",
      "84571\n",
      "84572\n",
      "84573\n",
      "84574\n",
      "84575\n",
      "84576\n",
      "84577\n",
      "84578\n",
      "84579\n",
      "84580\n",
      "84581\n",
      "84582\n",
      "84583\n",
      "84584\n",
      "84585\n",
      "84586\n",
      "84587\n",
      "84588\n",
      "84589\n",
      "84590\n",
      "84591\n",
      "84592\n",
      "84593\n",
      "84594\n",
      "84595\n",
      "84596\n",
      "84597\n",
      "84598\n",
      "84599\n",
      "84600\n",
      "84601\n",
      "84602\n",
      "84603\n",
      "84604\n",
      "84605\n",
      "84606\n",
      "84607\n",
      "84608\n",
      "84609\n",
      "84610\n",
      "84611\n",
      "84612\n",
      "84613\n",
      "84614\n",
      "84615\n",
      "84616\n",
      "84617\n",
      "84618\n",
      "84619\n",
      "84620\n",
      "84621\n",
      "84622\n",
      "84623\n",
      "84624\n",
      "84625\n",
      "84626\n",
      "84627\n",
      "84628\n",
      "84629\n",
      "84630\n",
      "84631\n",
      "84632\n",
      "84633\n",
      "84634\n",
      "84635\n",
      "84636\n",
      "84637\n",
      "84638\n",
      "84639\n",
      "84640\n",
      "84641\n",
      "84642\n",
      "84643\n",
      "84644\n",
      "84645\n",
      "84646\n",
      "84647\n",
      "84648\n",
      "84649\n",
      "84650\n",
      "84651\n",
      "84652\n",
      "84653\n",
      "84654\n",
      "84655\n",
      "84656\n",
      "84657\n",
      "84658\n",
      "84659\n",
      "84660\n",
      "84661\n",
      "84662\n",
      "84663\n",
      "84664\n",
      "84665\n",
      "84666\n",
      "84667\n",
      "84668\n",
      "84669\n",
      "84670\n",
      "84671\n",
      "84672\n",
      "84673\n",
      "84674\n",
      "84675\n",
      "84676\n",
      "84677\n",
      "84678\n",
      "84679\n",
      "84680\n",
      "84681\n",
      "84682\n",
      "84683\n",
      "84684\n",
      "84685\n",
      "84686\n",
      "84687\n",
      "84688\n",
      "84689\n",
      "84690\n",
      "84691\n",
      "84692\n",
      "84693\n",
      "84694\n",
      "84695\n",
      "84696\n",
      "84697\n",
      "84698\n",
      "84699\n",
      "84700\n",
      "84701\n",
      "84702\n",
      "84703\n",
      "84704\n",
      "84705\n",
      "84706\n",
      "84707\n",
      "84708\n",
      "84709\n",
      "84710\n",
      "84711\n",
      "84712\n",
      "84713\n",
      "84714\n",
      "84715\n",
      "84716\n",
      "84717\n",
      "84718\n",
      "84719\n",
      "84720\n",
      "84721\n",
      "84722\n",
      "84723\n",
      "84724\n",
      "84725\n",
      "84726\n",
      "84727\n",
      "84728\n",
      "84729\n",
      "84730\n",
      "84731\n",
      "84732\n",
      "84733\n",
      "84734\n",
      "84735\n",
      "84736\n",
      "84737\n",
      "84738\n",
      "84739\n",
      "84740\n",
      "84741\n",
      "84742\n",
      "84743\n",
      "84744\n",
      "84745\n",
      "84746\n",
      "84747\n",
      "84748\n",
      "84749\n",
      "84750\n",
      "84751\n",
      "84752\n",
      "84753\n",
      "84754\n",
      "84755\n",
      "84756\n",
      "84757\n",
      "84758\n",
      "84759\n",
      "84760\n",
      "84761\n",
      "84762\n",
      "84763\n",
      "84764\n",
      "84765\n",
      "84766\n",
      "84767\n",
      "84768\n",
      "84769\n",
      "84770\n",
      "84771\n",
      "84772\n",
      "84773\n",
      "84774\n",
      "84775\n",
      "84776\n",
      "84777\n",
      "84778\n",
      "84779\n",
      "84780\n",
      "84781\n",
      "84782\n",
      "84783\n",
      "84784\n",
      "84785\n",
      "84786\n",
      "84787\n",
      "84788\n",
      "84789\n",
      "84790\n",
      "84791\n",
      "84792\n",
      "84793\n",
      "84794\n",
      "84795\n",
      "84796\n",
      "84797\n",
      "84798\n",
      "84799\n",
      "84800\n",
      "84801\n",
      "84802\n",
      "84803\n",
      "84804\n",
      "84805\n",
      "84806\n",
      "84807\n",
      "84808\n",
      "84809\n",
      "84810\n",
      "84811\n",
      "84812\n",
      "84813\n",
      "84814\n",
      "84815\n",
      "84816\n",
      "84817\n",
      "84818\n",
      "84819\n",
      "84820\n",
      "84821\n",
      "84822\n",
      "84823\n",
      "84824\n",
      "84825\n",
      "84826\n",
      "84827\n",
      "84828\n",
      "84829\n",
      "84830\n",
      "84831\n",
      "84832\n",
      "84833\n",
      "84834\n",
      "84835\n",
      "84836\n",
      "84837\n",
      "84838\n",
      "84839\n",
      "84840\n",
      "84841\n",
      "84842\n",
      "84843\n",
      "84844\n",
      "84845\n",
      "84846\n",
      "84847\n",
      "84848\n",
      "84849\n",
      "84850\n",
      "84851\n",
      "84852\n",
      "84853\n",
      "84854\n",
      "84855\n",
      "84856\n",
      "84857\n",
      "84858\n",
      "84859\n",
      "84860\n",
      "84861\n",
      "84862\n",
      "84863\n",
      "84864\n",
      "84865\n",
      "84866\n",
      "84867\n",
      "84868\n",
      "84869\n",
      "84870\n",
      "84871\n",
      "84872\n",
      "84873\n",
      "84874\n",
      "84875\n",
      "84876\n",
      "84877\n",
      "84878\n",
      "84879\n",
      "84880\n",
      "84881\n",
      "84882\n",
      "84883\n",
      "84884\n",
      "84885\n",
      "84886\n",
      "84887\n",
      "84888\n",
      "84889\n",
      "84890\n",
      "84891\n",
      "84892\n",
      "84893\n",
      "84894\n",
      "84895\n",
      "84896\n",
      "84897\n",
      "84898\n",
      "84899\n",
      "84900\n",
      "84901\n",
      "84902\n",
      "84903\n",
      "84904\n",
      "84905\n",
      "84906\n",
      "84907\n",
      "84908\n",
      "84909\n",
      "84910\n",
      "84911\n",
      "84912\n",
      "84913\n",
      "84914\n",
      "84915\n",
      "84916\n",
      "84917\n",
      "84918\n",
      "84919\n",
      "84920\n",
      "84921\n",
      "84922\n",
      "84923\n",
      "84924\n",
      "84925\n",
      "84926\n",
      "84927\n",
      "84928\n",
      "84929\n",
      "84930\n",
      "84931\n",
      "84932\n",
      "84933\n",
      "84934\n",
      "84935\n",
      "84936\n",
      "84937\n",
      "84938\n",
      "84939\n",
      "84940\n",
      "84941\n",
      "84942\n",
      "84943\n",
      "84944\n",
      "84945\n",
      "84946\n",
      "84947\n",
      "84948\n",
      "84949\n",
      "84950\n",
      "84951\n",
      "84952\n",
      "84953\n",
      "84954\n",
      "84955\n",
      "84956\n",
      "84957\n",
      "84958\n",
      "84959\n",
      "84960\n",
      "84961\n",
      "84962\n",
      "84963\n",
      "84964\n",
      "84965\n",
      "84966\n",
      "84967\n",
      "84968\n",
      "84969\n",
      "84970\n",
      "84971\n",
      "84972\n",
      "84973\n",
      "84974\n",
      "84975\n",
      "84976\n",
      "84977\n",
      "84978\n",
      "84979\n",
      "84980\n",
      "84981\n",
      "84982\n",
      "84983\n",
      "84984\n",
      "84985\n",
      "84986\n",
      "84987\n",
      "84988\n",
      "84989\n",
      "84990\n",
      "84991\n",
      "84992\n",
      "84993\n",
      "84994\n",
      "84995\n",
      "84996\n",
      "84997\n",
      "84998\n",
      "84999\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:52: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:53: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:54: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:55: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:56: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 500 entries, 84500 to 84999\n",
      "Data columns (total 11 columns):\n",
      "title           500 non-null object\n",
      "total_shares    500 non-null int64\n",
      "url             500 non-null object\n",
      "Anger           500 non-null int64\n",
      "Anticipation    500 non-null int64\n",
      "Disgust         500 non-null int64\n",
      "Fear            500 non-null int64\n",
      "Joy             500 non-null int64\n",
      "Sadness         500 non-null int64\n",
      "Surpise         500 non-null int64\n",
      "Trust           500 non-null int64\n",
      "dtypes: int64(9), object(2)\n",
      "memory usage: 43.1+ KB\n",
      "None\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:57: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:58: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "/Users/tessmcnulty/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:59: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n"
     ]
    }
   ],
   "source": [
    "makeemotioncsvforrange(84500,85000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
